In [14]:
class Config(object):
    debug_level = 'debug1' #debug0
    
    device  = 'cuda' #'cpu'
    num_workers = 2
  
    pretrain_dir = '/kaggle/input/drone-image-segmentation-notebook-demo'
    image_dir = '/kaggle/input/semantic-drone-dataset/dataset/semantic_drone_dataset/original_images'
    mask_dir  = '/kaggle/input/semantic-drone-dataset/dataset/semantic_drone_dataset/label_images_semantic'
    data_dir  = '/kaggle/input/semantic-drone-dataset'
    
    num_label=24
    ignore_label=[0,23]
    width=512
    height=512
        
    logit_scale=4
    
    
    start_lr = 1e-3  
    batch_size = 5   # 32 
    num_epoch = 10
    
    fold = 0

    iter_log   = 1 #log at every 1 epoch
    iter_valid = 1
    iter_save  = 1
    fold_dir   ='/kaggle/working/train_result'
    initial_checkpoint=None
    
     
CFG = Config()
In [15]:
import sys
sys.path.append('/kaggle/input/drone-image-segmentation-notebook-demo')
from mix_transformer import *

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import *
import torch.optim as optim

from timeit import default_timer as timer
import time

from glob import glob
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random

import os
os.makedirs(CFG.fold_dir,exist_ok=True)


def set_all_random_seed(seed = int(time.time())):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    return seed
    
print(f'random seed {set_all_random_seed()}')
print('import ok !!!')
random seed 1679476942
import ok !!!
In [16]:
def check_mit_transformer(): 
    encoder = mit_b0()
    image = torch.rand(1,3,224,224)
    feature_map = encoder(image)
    
    print('image:')
    print(image.shape)
    print('feature_map:')
    [print(f.shape) for f in feature_map]
    
if CFG.debug_level in ['debug1']:
    check_mit_transformer()
image:
torch.Size([1, 3, 224, 224])
feature_map:
torch.Size([1, 32, 56, 56])
torch.Size([1, 64, 28, 28])
torch.Size([1, 160, 14, 14])
torch.Size([1, 256, 7, 7])
In [17]:
class MyUpSample(nn.Module):
    def __init__(self, scale_factor=2):
        super().__init__() 
        assert (scale_factor != 1)
        self.scale_factor = scale_factor

    def forward(self, x):
        x = F.interpolate(x, scale_factor=self.scale_factor, mode='bilinear', align_corners=False)
        return x


class MyDecoder(nn.Module):
    def __init__(
        self,
        encoder_dim=[32, 64, 160, 256],
        decoder_dim=256,
    ):
        super().__init__()
        L = len(encoder_dim)
        self.conv = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(encoder_dim[i],decoder_dim, kernel_size=1),
                nn.Identity() if i==0 else MyUpSample(2**i)
            )for i in range(L)
        ]) 
        self.fuse = nn.Conv2d(4*decoder_dim,decoder_dim, kernel_size=1)
 
    def forward(self, feature_map): 
        x = []
        L = len(feature_map)
        for i in range(L):
            x.append(self.conv[i](feature_map[i]))

        x = torch.cat(x,1)
        x = self.fuse(x)
        return x

    
    
class Net(nn.Module):
    def load_pretrain(self,):
        pass
        pretrain_file = f'{CFG.pretrain_dir}/mit_b0.pth'
        print(f'load {pretrain_file}')
        state_dict = torch.load(pretrain_file,map_location=lambda storage, loc: storage)
        print(self.encoder.load_state_dict(state_dict, strict=False))  # True
     
    def __init__( self,):
        super(Net, self).__init__() 
        self.register_buffer('mean', torch.FloatTensor([0.5, 0.5, 0.5]).reshape(1, 3, 1, 1))
        self.register_buffer('std', torch.FloatTensor([0.5, 0.5, 0.5]).reshape(1, 3, 1, 1))

        self.output_type = ['inference', 'loss']
        self.dropout = nn.Dropout(0.1)

        self.arch = 'mit_b0'
        self.encoder = mit_b0()
        encoder_sim = self.encoder.embed_dims
        #[64, 128, 320, 512]

        self.decoder = MyDecoder(
            encoder_dim = encoder_sim,
            decoder_dim=256, 
        )
 
        self.logit = nn.Sequential(
            nn.Conv2d(256, CFG.num_label, kernel_size = 1, padding = 0),
        )

    def forward(self, batch):
        x = batch['image']
      
        x = (x - self.mean) / self.std #broadcast to 3 channels
        B,C,H,W = x.shape
        #print('x', x.shape)

        encoder = self.encoder.forward_features(x)
        #print(self.encoder.embed_dims)
        #print('encoder', [f.shape for f in encoder])

        last = self.decoder(encoder)
        #print('last', last.shape)
 
        last = self.dropout(last)
        logit = self.logit(last)

        output = {}
        if 'loss' in self.output_type:
            output['label_loss'] = F_cross_entropy_2d(logit, batch['mask'])

        if 'inference' in self.output_type:
            probability = torch.softmax(logit, 1)
            probability = F.interpolate(probability,size=None,scale_factor=CFG.logit_scale,mode='bilinear',align_corners=False)
            output['probability'] = probability

        return output
    

def F_cross_entropy_2d(logit, truth):
    truth = F.interpolate(truth.float(),size=None,scale_factor=1/CFG.logit_scale,mode='nearest').long()
    truth = truth.reshape(-1)
    logit = logit.permute(0,2,3,1).reshape(-1,CFG.num_label)
    assert(len(CFG.ignore_label)==2) #hard code
    valid = (truth!=CFG.ignore_label[0]) & (truth!=CFG.ignore_label[1])
    loss = F.cross_entropy(logit[valid], truth[valid], label_smoothing=0.1)  
    return loss

def run_check_net():
    image_height, image_width = 224,224 
    batch_size = 2
    
    h,w = image_height, image_width 
    #---
    batch = {
        'image' : torch.from_numpy( np.random.uniform(-1,1,(batch_size,1,h,w))).to(CFG.device).float(),
        'mask'  : torch.from_numpy( np.random.choice(17,(batch_size,1,h,w))).to(CFG.device).long(),
    }

    net = Net().to(CFG.device) 
    net.load_pretrain()

    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled=True):
            output = net(batch) #call forward function

    print('batch')
    for k, v in batch.items():
        print('%32s :' % k, v.shape)

    print('output')
    for k, v in output.items():
        if 'loss' not in k:
            print('%32s :' % k, v.shape)
    for k, v in output.items():
        if 'loss' in k:
            print('%32s :' % k, v.item())

if CFG.debug_level in ['debug1']:
    run_check_net()
    
load /kaggle/input/drone-image-segmentation-notebook-demo/mit_b0.pth
_IncompatibleKeys(missing_keys=[], unexpected_keys=['head.weight', 'head.bias'])
batch
                           image : torch.Size([2, 1, 224, 224])
                            mask : torch.Size([2, 1, 224, 224])
output
                     probability : torch.Size([2, 24, 224, 224])
                      label_loss : 3.188117504119873
In [18]:
meta_df = pd.read_csv(f'{CFG.data_dir}/class_dict_seg.csv')
#meta_df = meta_df.reset_index()
label_to_name = {t:d['name'] for t,d in meta_df.iterrows()}
name_to_label = {v:k for k,v in label_to_name.items()}
label_to_bgr = {t:(d[' b'],d[' g'],d[' r']) for t,d in meta_df.iterrows()}
label_to_bgr_lut = {**{i:(0,0,0) for i in range(256)}, **label_to_bgr}
label_to_bgr_lut = np.asarray([list(label_to_bgr_lut.values())],np.uint8)

zz=0
def read_data(image_id):
    image = cv2.imread(f'{CFG.image_dir}/{image_id}.jpg',cv2.IMREAD_GRAYSCALE)
    mask  = cv2.imread(f'{CFG.mask_dir}/{image_id}.png',cv2.IMREAD_GRAYSCALE) 
    return image, mask

def null_augment(image, mask):
    image = cv2.resize(image, dsize=(CFG.width,CFG.height),interpolation=cv2.INTER_LINEAR)
    mask  = cv2.resize(mask, dsize=(CFG.width,CFG.height),interpolation=cv2.INTER_NEAREST)
    return image, mask


def make_fold(fold=0):
    glob_file = glob(f'{CFG.mask_dir}/*.png')
    image_id = [f.split('/')[-1][:-4] for f in glob_file]

    rs = np.random.RandomState(123)
    rs.shuffle(image_id)

    df = pd.DataFrame(data={
        'image_id' : image_id,
        'fold'  : -1,
        'split' : 'train',
    })
    df.loc[350:, 'split'] = 'test'
    df.loc[:349, 'fold'] =  np.arange(350)%5
    df = df.sort_values('image_id')

    #----
    eval_df = df[df.fold==-1].reset_index(drop=True)
    train_df = df[df.fold!=fold].reset_index(drop=True)
    valid_df = df[df.fold==fold].reset_index(drop=True)
    return train_df, valid_df, eval_df

class DroneDataset(Dataset):
    def __init__(self, df, augment=null_augment):
        self.df = df
        self.augment = augment
        self.length = len(self.df)

    def __str__(self):
        string = ''
        string += f'\tlen = {len(self)}\n'
        return string

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]

        image, mask = read_data(d.image_id)

        if self.augment is not None:
            image, mask  = self.augment(image, mask)

        r = {}
        r['index'] = index
        r['d'    ] = d
        r['mask' ] = torch.from_numpy(mask).byte()
        r['image'] = torch.from_numpy(image).byte()
        return r

def null_collate(batch):
    batch_size = len(batch)
    d = {}
    key = batch[0].keys()
    for k in key:
        d[k] = [b[k] for b in batch]
    d['image'] =  torch.stack(d['image'] ).unsqueeze(1)
    d['mask'] =  torch.stack(d['mask'] ).unsqueeze(1)
    return d


def run_check_dataset():

    train_df, valid_df, eval_df= make_fold(fold=0)
    dataset = DroneDataset(valid_df)
    print(dataset)

    for i in range(5):
        r = dataset[i]
        print(r['index'], '--------------------')
        print(r["d"], '\n')
        for k in ['image','mask']:
            v = r[k]
            print(k)
            print('\t', 'dtype:', v.dtype)
            print('\t', 'shape:', v.shape)
            if len(v)!=0:
                print('\t', 'min/max:', v.min().item(),'/', v.max().item())
                print('\t', 'is_contiguous:', v.is_contiguous())
                print('\t', 'values:')
                print('\t\t', v.reshape(-1)[:5].data.numpy().tolist(), '...')
                print('\t\t', v.reshape(-1)[-5:].data.numpy().tolist())
        print('')
        if 1:
            image = r['image'].data.cpu().numpy()
            mask = r['mask'].data.cpu().numpy()

            #draw
            overlay = np.dstack([
                cv2.LUT(mask,label_to_bgr_lut.T[i] ) for i in range(3)
            ])
            #image_show('image',image)
            #image_show('overlay',overlay)
            #image_show_norm('mask',mask)
            #cv2.waitKey(0)
            plt.imshow(image,cmap='gray');plt.show()
            plt.imshow(mask,cmap='gray');plt.show()
            plt.imshow(overlay[...,::-1]);plt.show()
            



    loader = DataLoader(
        dataset,
        sampler=SequentialSampler(dataset),
        batch_size=8,
        drop_last=True,
        num_workers=0,
        pin_memory=False,
        worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn=null_collate,
    )
    print(f'batch_size   : {loader.batch_size}')
    print(f'len(loader)  : {len(loader)}')
    print(f'len(dataset) : {len(dataset)}')
    print('')

    for t, batch in enumerate(loader):
        if t > 3: break
        print('batch ', t, '===================')
        print('index', batch['index'])

        for k in ['image','mask']:
            v = batch[k]
            print(f'{k}:')
            print('\t', v.shape)
            print('\t', 'is_contiguous:', v.is_contiguous())

        if 1:
            pass
        print('')
        
if CFG.debug_level in ['debug1']:
    run_check_dataset()
    
	len = 70

0 --------------------
image_id      001
fold            0
split       train
Name: 0, dtype: object 

image
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 8 / 249
	 is_contiguous: True
	 values:
		 [107, 111, 121, 105, 100] ...
		 [56, 57, 56, 53, 64]
mask
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 0 / 22
	 is_contiguous: True
	 values:
		 [0, 0, 0, 0, 0] ...
		 [20, 20, 20, 20, 20]

1 --------------------
image_id      002
fold            0
split       train
Name: 1, dtype: object 

image
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 2 / 255
	 is_contiguous: True
	 values:
		 [69, 73, 90, 71, 53] ...
		 [5, 5, 4, 4, 5]
mask
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 0 / 22
	 is_contiguous: True
	 values:
		 [0, 8, 8, 8, 8] ...
		 [2, 2, 2, 2, 2]

2 --------------------
image_id      013
fold            0
split       train
Name: 2, dtype: object 

image
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 1 / 243
	 is_contiguous: True
	 values:
		 [131, 126, 123, 126, 131] ...
		 [4, 3, 3, 3, 3]
mask
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 0 / 22
	 is_contiguous: True
	 values:
		 [0, 1, 1, 1, 1] ...
		 [2, 2, 2, 2, 2]

3 --------------------
image_id      014
fold            0
split       train
Name: 3, dtype: object 

image
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 7 / 255
	 is_contiguous: True
	 values:
		 [134, 118, 112, 101, 113] ...
		 [129, 125, 129, 143, 148]
mask
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 0 / 22
	 is_contiguous: True
	 values:
		 [0, 3, 3, 3, 3] ...
		 [0, 0, 0, 0, 0]

4 --------------------
image_id      056
fold            0
split       train
Name: 4, dtype: object 

image
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 2 / 243
	 is_contiguous: True
	 values:
		 [67, 84, 81, 87, 86] ...
		 [67, 66, 67, 59, 72]
mask
	 dtype: torch.uint8
	 shape: torch.Size([512, 512])
	 min/max: 0 / 22
	 is_contiguous: True
	 values:
		 [3, 3, 3, 3, 3] ...
		 [3, 3, 3, 3, 3]

batch_size   : 8
len(loader)  : 8
len(dataset) : 70

batch  0 ===================
index [0, 1, 2, 3, 4, 5, 6, 7]
image:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True
mask:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True

batch  1 ===================
index [8, 9, 10, 11, 12, 13, 14, 15]
image:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True
mask:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True

batch  2 ===================
index [16, 17, 18, 19, 20, 21, 22, 23]
image:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True
mask:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True

batch  3 ===================
index [24, 25, 26, 27, 28, 29, 30, 31]
image:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True
mask:
	 torch.Size([8, 1, 512, 512])
	 is_contiguous: True

In [19]:
def get_learning_rate(optimizer):
    lr=[]
    for param_group in optimizer.param_groups:
       lr += [ param_group['lr'] ]  
    return lr

def format_short_e(x):
    s = f'{x: 0.2e}'
    s = s.replace('e+0','e+')
    s = s.replace('e-0','e-')
    return s

def time_to_str(t, mode='min'):
    if mode=='min':
        t  = int(t)/60
        hr = t//60
        min = t%60
        return '%2d hr %02d min'%(hr,min)

    elif mode=='sec':
        t   = int(t)
        min = t//60
        sec = t%60
        return '%2d min %02d sec'%(min,sec)

    else:
        raise NotImplementedError

def mask_to_overlay(mask): 
    overlay = np.dstack([
        cv2.LUT(mask,label_to_bgr_lut.T[i] ) for i in range(3)
    ])
    return overlay
        
#======================================

def do_valid(net, valid_loader, iteration):
    valid_num = 0
    valid = {
        'label_loss': 0,
    }

    net = net.eval()
    start_timer = timer()
    for t, batch in enumerate(valid_loader):
        net.output_type = ['inference', 'loss']
        with torch.no_grad():
            with torch.cuda.amp.autocast(enabled = (CFG.device!='device')):
                batch_size = len(batch['index'])
                for k in ['image', 'mask']: batch[k] = batch[k].to(CFG.device)

                output = net(batch)  # data_parallel(net, batch) #

        valid['label_loss'] += output['label_loss'].mean().item()*batch_size
        valid_num += batch_size
        
        if CFG.debug_level in ['debug1']: #show some results
            if t==0:
                image = batch['image'].data.cpu().numpy()
                mask = batch['mask'].data.cpu().numpy().astype(np.uint8)
                probability = output['probability'].data.cpu().numpy()
                predict = probability.argmax(1).astype(np.uint8)
                for b in range(3):
                    m = image[b,0]
                    m = cv2.cvtColor(m,cv2.COLOR_GRAY2BGR)
                    t = mask[b,0]
                    p = predict[b]
                    #print(m.shape,t.shape,p.shape)
                    t = mask_to_overlay(t) 
                    p = mask_to_overlay(p) 
                    
                    overlay = np.hstack([m,t,p])
                    plt.imshow(overlay);plt.show()

        # ---
        print('\r %8d / %d  %s' % (valid_num, len(valid_loader.dataset), time_to_str(timer() - start_timer, 'sec')),
              end='', flush=True)   
    print('')
    assert (valid_num == len(valid_loader.dataset)) 
    loss = valid['label_loss']/valid_num

    return [loss, 0, 0, 0]


def run_train():
     
    os.makedirs(f'{CFG.fold_dir}/checkpoint',exist_ok=True)

    ## dataset ***   
    train_df, valid_df, eval_df = make_fold(CFG.fold)
    train_df = train_df[:50] ##for demo only. uncomment this for actual use
    
    train_dataset = DroneDataset(train_df, null_augment)
    valid_dataset = DroneDataset(valid_df, null_augment)

    train_loader = DataLoader(
        train_dataset,
        sampler = RandomSampler(train_dataset),
        # sampler  = BalanceSampler(train_dataset),
        batch_size = CFG.batch_size,
        drop_last=True,
        num_workers=CFG.num_workers,
        pin_memory=False,
        worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn=null_collate,
    )
    valid_loader = DataLoader(
        valid_dataset,
        sampler=SequentialSampler(valid_dataset),
        batch_size=CFG.batch_size,
        drop_last=False,
        num_workers=CFG.num_workers,
        pin_memory=False,
        collate_fn=null_collate,
    )

    print(f'fold = {CFG.fold}')
    print(f'train_dataset =\n{str(train_dataset)}')
    print(f'valid_dataset =\n{str(valid_dataset)}')
    print('\n')

    
    ## net *** 
    scaler = torch.cuda.amp.GradScaler(enabled=(CFG.device!='cpu'))
    net = Net()
    net.load_pretrain()
    start_iteration = 0
    start_epoch = 0

    print(f'\tinitial_checkpoint = {CFG.initial_checkpoint}')
    if CFG.initial_checkpoint is not None:
        f = torch.load(CFG.initial_checkpoint, map_location=lambda storage, loc: storage)
        print(net.load_state_dict(f['state_dict'], strict=False))  # True
        start_iteration = f.get('iteration', 0)
        start_epoch = f.get('epoch', 0)

    print(f'device = {CFG.device}')
    net.to(CFG.device)

    ## optimiser *** 
    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net.parameters()),lr=CFG.start_lr)
    print(f'optimizer =\n  {optimizer}')


    num_iteration = len(train_loader)*CFG.num_epoch
    iter_log   = int(len(train_loader)*CFG.iter_log)
    iter_valid = int(len(train_loader)*CFG.iter_valid)
    iter_save  = int(len(train_loader)*CFG.iter_save)

    print('')
    print('                           | valiation                       | batch/training           |')
    print('rate    iteration    epoch | loss                            | loss                     | ')
    print('==========================================================================================================')  
    def message(mode='print'):
        asterisk = ' '
        if mode == ('print'):
            loss = batch_loss
        if mode == ('log'):
            loss = train_loss
            if (iteration % iter_save == 0): asterisk = '*'

        text = ''
        text += f'{format_short_e(rate)} {iteration:08d} {asterisk} {epoch:0.2f}  ' + '| '
        text += ''.join( f'{v:4.4f}  ' for v in valid_loss ) + '| '
        text += ''.join( f'{v:4.4f}  ' for v in loss) + '| '
        text += f'{time_to_str(timer() - start_timer, "min")}'
        return text

    # ----
    valid_loss = np.zeros(4, np.float32)
    train_loss = np.zeros(3, np.float32)
    batch_loss = np.zeros_like(train_loss)
    sum_train_loss = np.zeros_like(train_loss)
    sum_train = 0

    start_timer = timer()
    iteration = start_iteration
    epoch = start_epoch
    rate = 0
    while iteration < num_iteration:
        for t, batch in enumerate(train_loader):

            if iteration % iter_save == 0:
                if iteration != start_iteration:
                    torch.save({
                        'state_dict': net.state_dict(),
                        'iteration': iteration,
                        'epoch': epoch,
                    }, CFG.fold_dir + f'/checkpoint/{iteration:08d}.model.pth')


            if (iteration % iter_valid == 0):  # or (t==len(train_loader)-1):
                # if iteration!=start_iteration:
                valid_loss = do_valid(net, valid_loader, f'{iteration:08d}')  #
                pass

            if (iteration % iter_log == 0) or (iteration % iter_valid == 0):
                print('\r', end='', flush=True)
                #log.write(message(mode='log') + '\n')

            # learning rate schduler ------------
            # adjust_learning_rate(optimizer, scheduler(epoch))
            rate = get_learning_rate(optimizer)[0]  # scheduler.get_last_lr()[0] #get_learning_rate(optimizer)

            # one iteration update  -------------
            batch_size = len(batch['index'])
            for k in ['image','mask']: batch[k] = batch[k].to(CFG.device)
            batch['image'] = batch['image'].float() / 255

            net.train()
            net.output_type = ['loss', 'inference']
            # with torch.autograd.set_detect_anomaly(True):
            if 1:
                with torch.cuda.amp.autocast(enabled=(CFG.device!='cpu')):
                    output = net(batch) #data_parallel(net, batch)
                    loss0 = output['label_loss'].mean()


                optimizer.zero_grad()
                scaler.scale(loss0).backward()

                # scaler.unscale_(optimizer)
                # torch.nn.utils.clip_grad_norm_(net.parameters(), 2)
                scaler.step(optimizer)
                scaler.update()

            # print statistics  --------
            batch_loss[:1] = [loss0.item()]
            sum_train_loss += batch_loss
            sum_train += 1
            if t % 100 == 0:
                train_loss = sum_train_loss / (sum_train + 1e-12)
                sum_train_loss[...] = 0
                sum_train = 0

            print('\r', end='', flush=True)
            print(message(mode='print'), end='', flush=True)
            epoch += 1 / len(train_loader)
            iteration += 1

            # debug  --------
            # if 1:
            # if t % 100 == 0:
            # 	show_result(batch, output, resize=0.50)
            # 	cv2.waitKey(1)

        torch.cuda.empty_cache()
        print('')
     

    
run_train()
fold = 0
train_dataset =
	len = 50

valid_dataset =
	len = 70



load /kaggle/input/drone-image-segmentation-notebook-demo/mit_b0.pth
_IncompatibleKeys(missing_keys=[], unexpected_keys=['head.weight', 'head.bias'])
	initial_checkpoint = None
device = cuda
optimizer =
  AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)

                           | valiation                       | batch/training           |
rate    iteration    epoch | loss                            | loss                     | 
==========================================================================================================
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 23 sec
 1.00e-3 00000009   0.90  | 3.1557  0.0000  0.0000  0.0000  | 2.4414  0.0000  0.0000  |  0 hr 00 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000019   1.90  | 2.3986  0.0000  0.0000  0.0000  | 1.9239  0.0000  0.0000  |  0 hr 01 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000029   2.90  | 2.5717  0.0000  0.0000  0.0000  | 2.1340  0.0000  0.0000  |  0 hr 01 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 22 sec
 1.00e-3 00000039   3.90  | 2.6633  0.0000  0.0000  0.0000  | 2.2358  0.0000  0.0000  |  0 hr 02 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000049   4.90  | 3.8151  0.0000  0.0000  0.0000  | 2.0833  0.0000  0.0000  |  0 hr 02 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000059   5.90  | 2.7950  0.0000  0.0000  0.0000  | 1.5555  0.0000  0.0000  |  0 hr 03 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000069   6.90  | 3.7249  0.0000  0.0000  0.0000  | 1.9129  0.0000  0.0000  |  0 hr 04 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000079   7.90  | 3.5498  0.0000  0.0000  0.0000  | 1.6567  0.0000  0.0000  |  0 hr 04 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000089   8.90  | 3.5584  0.0000  0.0000  0.0000  | 1.0749  0.0000  0.0000  |  0 hr 05 min
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
(512, 512, 3) (512, 512) (512, 512)
       70 / 70   0 min 21 sec
 1.00e-3 00000099   9.90  | 3.3584  0.0000  0.0000  0.0000  | 1.6065  0.0000  0.0000  |  0 hr 05 min